In [1]:
import pandas as pd
import numpy as np
from PlateAnalysis_Triton import PlateDataset
import matplotlib.pyplot as plt
import re
plate1_t1 = PlateDataset('20191120_Triton_Dopt_plate_1.CSV')
plate2_t1 = PlateDataset('20191120_Triton_Dopt_plate_2.CSV')
plate1_t2 = PlateDataset('20191120_Triton_Dopt_plate_1_t2.CSV')
plate2_t2 = PlateDataset('20191120_Triton_Dopt_plate_2_t2.CSV')
# Keeping track of which ones I messed up
WrongWayRound = {1:False,2:False,3:False,4:False,
5:False,6:False,7:False,8:False,
9:False,10:False,11:False,12:False,
13:False, 14:False, 15:False, 16:False,
17:False,18:False, 19:False,20:False}
def PercentHighSpin(traces):
(103.3 *traces.loc[:,391]) - (42.8 * traces.loc[:,417])/\
traces.loc[:,391]*(103.3-54.5) - traces.loc[:,417]*(42.8-92.4)
def PlotTracesAndGetMetrics(plate,wrongwayround,i,plan, metrics):
if (i-1)%2==0:
idx = int(i/2)
else:
idx = int((i-1)/2)
row = plan.loc[idx,:]
print(f'{plate.plate_data}\t Trace {i}')
print(f"Protein conc = {round(row['Protein Conc'],2)},\
Vol = {round(row['Protein Vol'],2)}, Triton = {round(row['Triton'],2)}")
NormalizedTraces, DifferenceSpec, DiffDiff = plate.AnalysisPipeline_1(i,WrongWayRound[i])
plate.PlotTrace(NormalizedTraces,vol=row['Protein Vol'])
plate.PlotTrace(DifferenceSpec,vol=row['Protein Vol'])
plate.PlotMichaelesMenten(DiffDiff,row['Protein Vol'],'')
concs = plate.CalculateCompoundConcs(4,row['Protein Vol'],5)
km, vmax, loss = plate.FitMichaelisMenten(concs,DiffDiff)
Noise = DifferenceSpec.loc[:,405].std()
temp = pd.Series([km.item(), vmax.item(), loss.item(),Noise],index = ['Km','Vmax','Loss','Noise'])
temp = temp.append(row)
t = plate.metadata.iloc[0,2]
t = re.search(r'\d\d:\d\d:\d\d',t)[0]
temp['Time'] = t
temp = pd.DataFrame(temp).T
return temp
plan = pd.read_csv('Triton_Dopt.csv',index_col = 0)
metrics = pd.DataFrame([],columns=['Km', 'Vmax', 'Loss', 'Noise','Protein Conc', 'Protein Vol', 'Triton'])
for i in range(1,21):
metrics = metrics.append(PlotTracesAndGetMetrics(plate1_t1,False,i,plan,metrics),sort=True)
metrics = metrics.append(PlotTracesAndGetMetrics(plate2_t1,False,i,plan,metrics),sort=True)
metrics = metrics.append(PlotTracesAndGetMetrics(plate1_t2,False,i,plan,metrics),sort=True)
metrics = metrics.append(PlotTracesAndGetMetrics(plate2_t2,False,i,plan,metrics),sort=True)
20191120_Triton_Dopt_plate_1.CSV Trace 1 Protein conc = 10.0, Vol = 20.0, Triton = 0.01
20191120_Triton_Dopt_plate_2.CSV Trace 1 Protein conc = 10.0, Vol = 20.0, Triton = 0.01
20191120_Triton_Dopt_plate_1_t2.CSV Trace 1 Protein conc = 10.0, Vol = 20.0, Triton = 0.01
20191120_Triton_Dopt_plate_2_t2.CSV Trace 1 Protein conc = 10.0, Vol = 20.0, Triton = 0.01
20191120_Triton_Dopt_plate_1.CSV Trace 2 Protein conc = 10.0, Vol = 20.0, Triton = 0.01
20191120_Triton_Dopt_plate_2.CSV Trace 2 Protein conc = 10.0, Vol = 20.0, Triton = 0.01
20191120_Triton_Dopt_plate_1_t2.CSV Trace 2 Protein conc = 10.0, Vol = 20.0, Triton = 0.01
20191120_Triton_Dopt_plate_2_t2.CSV Trace 2 Protein conc = 10.0, Vol = 20.0, Triton = 0.01
20191120_Triton_Dopt_plate_1.CSV Trace 3 Protein conc = 10.0, Vol = 30.0, Triton = 0.1
20191120_Triton_Dopt_plate_2.CSV Trace 3 Protein conc = 10.0, Vol = 30.0, Triton = 0.1
20191120_Triton_Dopt_plate_1_t2.CSV Trace 3 Protein conc = 10.0, Vol = 30.0, Triton = 0.1
20191120_Triton_Dopt_plate_2_t2.CSV Trace 3 Protein conc = 10.0, Vol = 30.0, Triton = 0.1
20191120_Triton_Dopt_plate_1.CSV Trace 4 Protein conc = 10.0, Vol = 30.0, Triton = 0.1
20191120_Triton_Dopt_plate_2.CSV Trace 4 Protein conc = 10.0, Vol = 30.0, Triton = 0.1
20191120_Triton_Dopt_plate_1_t2.CSV Trace 4 Protein conc = 10.0, Vol = 30.0, Triton = 0.1
20191120_Triton_Dopt_plate_2_t2.CSV Trace 4 Protein conc = 10.0, Vol = 30.0, Triton = 0.1
20191120_Triton_Dopt_plate_1.CSV Trace 5 Protein conc = 10.0, Vol = 26.36, Triton = 0.04
20191120_Triton_Dopt_plate_2.CSV Trace 5 Protein conc = 10.0, Vol = 26.36, Triton = 0.04
20191120_Triton_Dopt_plate_1_t2.CSV Trace 5 Protein conc = 10.0, Vol = 26.36, Triton = 0.04
20191120_Triton_Dopt_plate_2_t2.CSV Trace 5 Protein conc = 10.0, Vol = 26.36, Triton = 0.04
20191120_Triton_Dopt_plate_1.CSV Trace 6 Protein conc = 10.0, Vol = 26.36, Triton = 0.04
20191120_Triton_Dopt_plate_2.CSV Trace 6 Protein conc = 10.0, Vol = 26.36, Triton = 0.04
20191120_Triton_Dopt_plate_1_t2.CSV Trace 6 Protein conc = 10.0, Vol = 26.36, Triton = 0.04
20191120_Triton_Dopt_plate_2_t2.CSV Trace 6 Protein conc = 10.0, Vol = 26.36, Triton = 0.04
20191120_Triton_Dopt_plate_1.CSV Trace 7 Protein conc = 10.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_2.CSV Trace 7 Protein conc = 10.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_1_t2.CSV Trace 7 Protein conc = 10.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_2_t2.CSV Trace 7 Protein conc = 10.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_1.CSV Trace 8 Protein conc = 10.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_2.CSV Trace 8 Protein conc = 10.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_1_t2.CSV Trace 8 Protein conc = 10.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_2_t2.CSV Trace 8 Protein conc = 10.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_1.CSV Trace 9 Protein conc = 20.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_2.CSV Trace 9 Protein conc = 20.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_1_t2.CSV Trace 9 Protein conc = 20.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_2_t2.CSV Trace 9 Protein conc = 20.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_1.CSV Trace 10 Protein conc = 20.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_2.CSV Trace 10 Protein conc = 20.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_1_t2.CSV Trace 10 Protein conc = 20.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_2_t2.CSV Trace 10 Protein conc = 20.0, Vol = 20.0, Triton = 0.1
20191120_Triton_Dopt_plate_1.CSV Trace 11 Protein conc = 16.36, Vol = 26.36, Triton = 0.1
20191120_Triton_Dopt_plate_2.CSV Trace 11 Protein conc = 16.36, Vol = 26.36, Triton = 0.1
20191120_Triton_Dopt_plate_1_t2.CSV Trace 11 Protein conc = 16.36, Vol = 26.36, Triton = 0.1
20191120_Triton_Dopt_plate_2_t2.CSV Trace 11 Protein conc = 16.36, Vol = 26.36, Triton = 0.1
20191120_Triton_Dopt_plate_1.CSV Trace 12 Protein conc = 16.36, Vol = 26.36, Triton = 0.1
20191120_Triton_Dopt_plate_2.CSV Trace 12 Protein conc = 16.36, Vol = 26.36, Triton = 0.1
20191120_Triton_Dopt_plate_1_t2.CSV Trace 12 Protein conc = 16.36, Vol = 26.36, Triton = 0.1
20191120_Triton_Dopt_plate_2_t2.CSV Trace 12 Protein conc = 16.36, Vol = 26.36, Triton = 0.1
20191120_Triton_Dopt_plate_1.CSV Trace 13 Protein conc = 13.64, Vol = 30.0, Triton = 0.01
20191120_Triton_Dopt_plate_2.CSV Trace 13 Protein conc = 13.64, Vol = 30.0, Triton = 0.01
20191120_Triton_Dopt_plate_1_t2.CSV Trace 13 Protein conc = 13.64, Vol = 30.0, Triton = 0.01
20191120_Triton_Dopt_plate_2_t2.CSV Trace 13 Protein conc = 13.64, Vol = 30.0, Triton = 0.01
20191120_Triton_Dopt_plate_1.CSV Trace 14 Protein conc = 13.64, Vol = 30.0, Triton = 0.01
20191120_Triton_Dopt_plate_2.CSV Trace 14 Protein conc = 13.64, Vol = 30.0, Triton = 0.01
20191120_Triton_Dopt_plate_1_t2.CSV Trace 14 Protein conc = 13.64, Vol = 30.0, Triton = 0.01
20191120_Triton_Dopt_plate_2_t2.CSV Trace 14 Protein conc = 13.64, Vol = 30.0, Triton = 0.01
20191120_Triton_Dopt_plate_1.CSV Trace 15 Protein conc = 20.0, Vol = 30.0, Triton = 0.07
20191120_Triton_Dopt_plate_2.CSV Trace 15 Protein conc = 20.0, Vol = 30.0, Triton = 0.07
20191120_Triton_Dopt_plate_1_t2.CSV Trace 15 Protein conc = 20.0, Vol = 30.0, Triton = 0.07
20191120_Triton_Dopt_plate_2_t2.CSV Trace 15 Protein conc = 20.0, Vol = 30.0, Triton = 0.07
20191120_Triton_Dopt_plate_1.CSV Trace 16 Protein conc = 20.0, Vol = 30.0, Triton = 0.07
20191120_Triton_Dopt_plate_2.CSV Trace 16 Protein conc = 20.0, Vol = 30.0, Triton = 0.07
20191120_Triton_Dopt_plate_1_t2.CSV Trace 16 Protein conc = 20.0, Vol = 30.0, Triton = 0.07
20191120_Triton_Dopt_plate_2_t2.CSV Trace 16 Protein conc = 20.0, Vol = 30.0, Triton = 0.07
20191120_Triton_Dopt_plate_1.CSV Trace 17 Protein conc = 20.0, Vol = 22.73, Triton = 0.01
20191120_Triton_Dopt_plate_2.CSV Trace 17 Protein conc = 20.0, Vol = 22.73, Triton = 0.01
20191120_Triton_Dopt_plate_1_t2.CSV Trace 17 Protein conc = 20.0, Vol = 22.73, Triton = 0.01
20191120_Triton_Dopt_plate_2_t2.CSV Trace 17 Protein conc = 20.0, Vol = 22.73, Triton = 0.01
20191120_Triton_Dopt_plate_1.CSV Trace 18 Protein conc = 20.0, Vol = 22.73, Triton = 0.01
20191120_Triton_Dopt_plate_2.CSV Trace 18 Protein conc = 20.0, Vol = 22.73, Triton = 0.01
20191120_Triton_Dopt_plate_1_t2.CSV Trace 18 Protein conc = 20.0, Vol = 22.73, Triton = 0.01
20191120_Triton_Dopt_plate_2_t2.CSV Trace 18 Protein conc = 20.0, Vol = 22.73, Triton = 0.01
20191120_Triton_Dopt_plate_1.CSV Trace 19 Protein conc = 15.45, Vol = 20.0, Triton = 0.05
20191120_Triton_Dopt_plate_2.CSV Trace 19 Protein conc = 15.45, Vol = 20.0, Triton = 0.05
20191120_Triton_Dopt_plate_1_t2.CSV Trace 19 Protein conc = 15.45, Vol = 20.0, Triton = 0.05
20191120_Triton_Dopt_plate_2_t2.CSV Trace 19 Protein conc = 15.45, Vol = 20.0, Triton = 0.05
20191120_Triton_Dopt_plate_1.CSV Trace 20 Protein conc = 15.45, Vol = 20.0, Triton = 0.05
20191120_Triton_Dopt_plate_2.CSV Trace 20 Protein conc = 15.45, Vol = 20.0, Triton = 0.05
20191120_Triton_Dopt_plate_1_t2.CSV Trace 20 Protein conc = 15.45, Vol = 20.0, Triton = 0.05
20191120_Triton_Dopt_plate_2_t2.CSV Trace 20 Protein conc = 15.45, Vol = 20.0, Triton = 0.05
In [2]:
plt.hist(metrics['Noise']/metrics['Vmax'])
#plt.hist(metrics['Vmax'])
plt.show()
Curve fitting looks good¶
Now I'll do some histograms and maybe a model
In [3]:
metrics
Out[3]:
| Km | Loss | Noise | Protein Conc | Protein Vol | Time | Triton | Vmax | |
|---|---|---|---|---|---|---|---|---|
| 0 | 9.12354 | 0.0808028 | 0.0552286 | 10 | 20 | 17:59:22 | 0.01 | 0.101765 |
| 0 | 5.73576 | 0.102441 | 0.0448017 | 10 | 20 | 17:52:30 | 0.01 | 0.081759 |
| 0 | 11.0252 | 0.0679967 | 0.0427759 | 10 | 20 | 19:14:17 | 0.01 | 0.0958207 |
| 0 | 5.17611 | 0.0949312 | 0.045031 | 10 | 20 | 19:24:09 | 0.01 | 0.0802563 |
| 0 | 41.1611 | 0.0369353 | 0.0237407 | 10 | 20 | 17:59:22 | 0.01 | 0.0786931 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 0 | 96.0899 | 0.888629 | 0.0318684 | 15.4545 | 20 | 19:24:09 | 0.0509091 | 0.103195 |
| 0 | 45.1085 | 0.00312996 | 0.00494032 | 15.4545 | 20 | 17:59:22 | 0.0509091 | 0.103515 |
| 0 | 114.496 | 1.7591 | 0.0379043 | 15.4545 | 20 | 17:52:30 | 0.0509091 | 0.0955621 |
| 0 | 44.0459 | 0.00243241 | 0.00415495 | 15.4545 | 20 | 19:14:17 | 0.0509091 | 0.101184 |
| 0 | 37.8077 | 1.60299 | 0.0387216 | 15.4545 | 20 | 19:24:09 | 0.0509091 | 0.0838076 |
80 rows × 8 columns
In [4]:
metrics.reset_index(inplace=True,drop=True)
metrics['R^2'] = 1-metrics['Loss']
metrics.to_csv('Triton_Dopt_metrics.csv')
In [5]:
def PlotHist(df, colname):
plt.figure(figsize = (5,4))
plt.hist(df[colname],bins=20,alpha=0.8)
plt.xlabel(colname)
plt.ylabel('Frequency')
plt.show()
for i in ['Km','Vmax','R^2']:
PlotHist(metrics,i)
There are some outliers, which I think was a result of the multidrop mis-dispensing. We noticed that some protein/buffer had been dispensed around the wells in plate 2, probably because the multidrop nozzles tarted foaming between priming with air and protein
In [6]:
for i in ['Km','Vmax','R^2']:
PlotHist(metrics.loc[metrics['R^2']>0.5],i)
In [32]:
plt.figure(figsize = (5,4))
plt.hist(metrics['Vmax']/metrics['Protein Vol']*10,bins=20,alpha=0.8)
plt.xlabel('Normalized Vmax')
plt.ylabel('Frequency')
plt.show()
In [49]:
plt.hist(metrics['Vmax']/metrics['Protein Vol'])
plt.show()
In [66]:
import torch
import torch.nn as nn
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
x = metrics[['Protein Conc', 'Protein Vol','Triton']]
y = pd.DataFrame(metrics['Vmax']/metrics['Protein Vol']) #metrics[['Noise']]/metrics[['Vmax']]
scaler = MinMaxScaler()
x = scaler.fit_transform(x)
y = scaler.fit_transform(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)
x_train, x_test, y_train, y_test = torch.tensor(x_train,dtype=torch.float),\
torch.tensor(x_test,dtype=torch.float), \
torch.tensor(y_train,dtype=torch.float), \
torch.tensor(y_test,dtype=torch.float)
def r_squared_loss(y,yh):
residuals = y-yh
ss_res = (residuals**2).sum()
ss_tot = ((y-y.mean())**2).sum()
r_squared = 1 - (ss_res / ss_tot)
return 1-r_squared
class dataset(Dataset):
def __init__(self, x,y):
self.x = x
self.y = y
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.x.shape[0]
train_loader = DataLoader(dataset=dataset(x_train,y_train),
batch_size=8,
shuffle=True,
num_workers=0)
lin = nn.Linear(3,1)
loss_fn = r_squared_loss#nn.MSELoss()
opt = torch.optim.Adam(lin.parameters(), lr=1e-3)
loss_record = []
for epoch in tqdm(range(5000)):
for i, batch in enumerate(train_loader, 0):
x_,y_ = batch
yhat = lin.forward(x_)
loss = loss_fn(y_,yhat)
loss.backward()
opt.step()
opt.zero_grad()
loss_record.append(loss.item())
print(f'Training loss = {round(loss.item(),4)}')
yhat = lin.forward(x_test)
err = loss_fn(y_test,yhat)
print(f'Test loss = {round(err.item(),4)}')
plt.figure(figsize=(5,5))
plt.plot(np.array(loss_record))
plt.xlabel('Iteration')
plt.ylabel('Mean Squared Error')
plt.show()
plt.figure(figsize=(4,4))
plt.scatter(yhat.detach(),y_test.detach(),alpha = 1, s =5)
plt.plot([0,1],[0,1])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.xlim((0,1))
plt.ylim((0,1))
plt.show()
print('Rsq = ',plate1_t1.r_squared_torch(yhat,y_test).item())
100%|██████████| 5000/5000 [00:20<00:00, 244.74it/s]
Training loss = 0.6237 Test loss = 0.7154
Rsq = -1.527360200881958
In [61]:
yhat
Out[61]:
tensor([[0.4918],
[0.4711],
[0.4671],
[0.6137],
[0.4711],
[0.7646],
[0.4671],
[0.6332],
[0.4711],
[0.4711],
[0.5616],
[0.4711],
[0.4002],
[0.4134],
[0.6718],
[0.6137],
[0.6718],
[0.4671],
[0.4711],
[0.4134],
[0.4671],
[0.6137],
[0.4918],
[0.6137],
[0.5616],
[0.4711],
[0.4002]], grad_fn=<MmBackward>)
In [60]:
from torch.utils.data import Dataset, DataLoader
poly = PolynomialFeatures(2)
x_poly = poly.fit_transform(x)
# Train test split
x_train, x_test, y_train, y_test = train_test_split(x_poly, y, test_size=0.33)
# make into tensors
x_train, x_test, y_train, y_test = torch.tensor(x_train,dtype=torch.float),\
torch.tensor(x_test,dtype=torch.float), \
torch.tensor(y_train,dtype=torch.float), \
torch.tensor(y_test,dtype=torch.float)
class dataset(Dataset):
def __init__(self, x,y):
self.x = x
self.y = y
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return self.x.shape[0]
train_loader = DataLoader(dataset=dataset(x_train,y_train),
batch_size=8,
shuffle=True,
num_workers=0)
polymodel = nn.Linear(x_train.shape[1],1,bias = False)
opt = torch.optim.Adam(polymodel.parameters(),lr = 1e-3)
loss_fn = nn.MSELoss()
lossrecord = []
for epoch in tqdm(range(1000)):
for i, batch in enumerate(train_loader, 0):
x_,y_ = batch
yhat = polymodel.forward(x_)
loss = loss_fn(y_,yhat)
loss.backward()
opt.step()
opt.zero_grad()
loss_record.append(loss.item())
print(f'Training loss = {round(loss.item(),4)}')
yhat = polymodel.forward(x_test)
err = loss_fn(y_test,yhat)
print(f'Test loss = {round(err.item(),4)}')
plt.figure(figsize=(5,5))
plt.plot(np.array(loss_record))
plt.xlabel('Iteration')
plt.ylabel('Mean Squared Error')
plt.show()
plt.figure(figsize=(4,4))
plt.scatter(yhat.detach(),y_test.detach(),alpha = 1, s =5)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.xlim((0,1))
plt.ylim((0,1))
plt.show()
print('Rsq = ',plate1_t1.r_squared_torch(yhat,y_test))
100%|██████████| 1000/1000 [00:03<00:00, 305.75it/s]
Training loss = 0.0057 Test loss = 0.0542
Rsq = tensor(-5.1425, grad_fn=<RsubBackward1>)